Last modified 05:25 PM on Feb 17, 2016. This document, R session image, version history, knitr cache, figures, and other associated datasets are located in /inside/grotto/blin/trna-markers/luad/predict/.
library(plyr)
library(glmnet)
library(ROCR)
library(GenomicRanges)
library(stringr)
library(reshape2)
set.seed(12)
load('/inside/home/blin/grotto/data/hg19-srnas.RData')
load('/inside/home/blin/grotto/trna-markers/process-reads/luad-counts.RData')
luad_clinical <- luad_clinical[match(colnames(luad_adjusted_counts), luad_clinical$barcode), ]
ntrials <- 200
source('predict.R')
buildTestCompareGlms <- function(metadata, covariate, counts, ntrials) {
# metadata: filtered metadata for covariate of interest
# counts: filtered counts containing only sample with covariate of interest
# set up feature sets
load('/inside/home/blin/grotto/data/hg19-srnas.RData')
tsrnas <- unique(srnas[srnas$class %in% c("fivehalf", "threehalf", "trailer")]$tx_name)
mirnas <- unique(srnas[str_detect(srnas$class, "mi")]$tx_name)
snornas <- unique(srnas[srnas$class == "snoRNA"]$tx_name)
pirnas <- unique(srnas[srnas$class == "piRNA"]$tx_name)
# create training/testing sets and feature sets
system(paste0('echo Setting up training/testing sets'))
datasets <- lapply(1:ntrials, function(i) setupTrainingTestingSets(metadata, covariate, counts))
# build, test on scrambled data (control)
system(paste0('echo Building/testing models - control'))
control_tsrna_glms <- lapply(datasets, buildTestGlm, features = tsrnas, covariate = covariate, randomize = TRUE) # the same models are built twice, but the code is a lot cleaner this way
control_mirna_glms <- lapply(datasets, buildTestGlm, features = mirnas, covariate = covariate, randomize = TRUE)
control_feature_glms <- list(control_tsrna_glms, control_mirna_glms)
control_roc <- ldply(mapply(parseGlms, glms = control_feature_glms, class = c("tsRNA, permuted", "miRNA, permuted"), SIMPLIFY = FALSE), identity)
# build and predict testing data
system(paste0('echo Building/testing models'))
all_feature_glms <- lapply(list(tsrnas, mirnas, snornas, pirnas), function(features) {
lapply(datasets, buildTestGlm, features = features, covariate = covariate, randomize = FALSE)
})
roc <- ldply(mapply(parseGlms, glms = all_feature_glms, class = c("tsRNA", "miRNA", "snoRNA", "piRNA"), SIMPLIFY = FALSE), identity)
roc <- rbind(roc, control_roc)
roc$Class <- factor(roc$Class, levels = c("tsRNA", "miRNA", "tsRNA, permuted", "snoRNA", "piRNA", "miRNA, permuted"))
# create plots
system(paste0('echo Creating summary plot'))
plot <- plotGlms(roc)
# create summary object
summary <- list()
summary$glms <- all_feature_glms
summary$control_glms <- control_feature_glms
summary$roc <- roc
summary$plot <- plot
summary$samples <- lapply(datasets, function(dataset) unname(dataset$participant_ids))
summary
}
metadata <- luad_clinical metadata$sample_type <- as.factor(metadata$sample_type) sample_type <- buildTestCompareGlms(metadata = metadata, covariate = "sample_type", counts = luad_adjusted_counts, ntrials = ntrials)
sample_type$plot
plotAucs(sample_type$glms)
metadata <- luad_clinical[luad_clinical$m_stage %in% c("M0", "M1", "M1a", "M1b"), ]
counts <- luad_adjusted_counts[, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$metastasis <- as.factor(ifelse(metadata$m_stage == "M0", "No metastasis" , "Metastasis"))
metastasis <- buildTestCompareGlms(metadata = metadata, covariate = "metastasis", counts = counts, ntrials = ntrials)
metastasis$plot
plotAucs(metastasis$glms)
# stage I, IA vs stage IIIA, IIIB, IV - essentially spreading vs non-spreading
metadata <- luad_clinical[luad_clinical$stage %in% c("Stage I", "Stage IA", "Stage IIIA", "Stage IIIB", "Stage IV"), ]
counts <- luad_adjusted_counts[, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$stage <- as.factor(ifelse(metadata$stage %in% c("Stage I", "Stage IA"), "Early stage" , "Late stage"))
stage_coarse <- buildTestCompareGlms(metadata = metadata, covariate = "stage", counts = counts, ntrials = ntrials)
stage_coarse$plot
plotAucs(stage_coarse$glms)
metadata <- luad_clinical[luad_clinical$n_stage %in% c("N0", "N1", "N2", "N3"), ]
counts <- luad_adjusted_counts[, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$n_stage <- as.factor(ifelse(metadata$n_stage %in% c("N0"), "N0" , "N1+"))
lymph_spread <- buildTestCompareGlms(metadata = metadata, covariate = "n_stage", counts = counts, ntrials = ntrials)
lymph_spread$plot
plotAucs(lymph_spread$glms)
tsrna_coefs <- c(extractCoefs(sample_type$glms[[1]], ntrials), extractCoefs(stage_coarse$glms[[1]], ntrials))
tsrna_coefs <- data.frame(table(names(tsrna_coefs)))
tsrna_coefs <- tsrna_coefs[order(tsrna_coefs$Freq, decreasing = TRUE), ]
ggplot(tsrna_coefs) + geom_histogram(aes(x = Freq)) + xlab(paste("No. times tsRNA selected in", ntrials, "GLMs"))
mirna_coefs <- c(extractCoefs(sample_type$glms[[2]], ntrials), extractCoefs(stage_coarse$glms[[2]], ntrials))
mirna_coefs <- data.frame(table(names(mirna_coefs)))
mirna_coefs <- mirna_coefs[order(mirna_coefs$Freq, decreasing = TRUE), ]
ggplot(mirna_coefs) + geom_histogram(aes(x = Freq)) + xlab(paste("No. times miRNA selected in", ntrials, "GLMs"))
save(tsrna_coefs, mirna_coefs, file = "coefs.RData")
save.session("predict.RSession")
## Saving search path.. ## Saving list of loaded packages.. ## Saving all data... ## Done.